/*

Step 1: figure out entry dates of various drugs (manually verify)


Step 2: create list of users who are involved in each quasi-experiment

*/









/*
Step 1: use raw claims data to figure out entry dates for all drugs
*/

set more off


set more off
clear
gen generic_id = 0
save entry_dates_cholesterol, replace


forvalues j=1996/2013 {
	cd "/disk/aging/mktscan/nongeo/data/100pct"
	clear
	use genind ndcnum svcdate pay awp using ccaed`j'

	merge m:1 ndcnum using "compressed_redbook_w_indications", keep(mat) keepus(generic_id indic_id xr)
	drop _merge

	// only cholesterol
	keep if indic_id == 5

	destring genind, replace force
	keep if genind >= 1 & genind <= 5

	gen generic = (genind == 4) | (genind == 5)
	drop genind

	// sort generic_id generic xr
	egen drug_id = group(generic_id generic xr) // NOTE: drug ID not comparable across datasets



	// new filter (based on Zocor issue)
	gen week = week(svcdate)
	gen dummy = 1
	egen s = sum(dummy), by(drug_id week)
	drop if s < 5
	drop s dummy week

	// manual filter for 1997 and 1998
	replace generic = 0 if `j' == 1997 | `j' == 1998
	replace generic = 0 if `j' == 2002 & generic_id == 6190


	// same collapse after filtering
	gen entries = 1
	collapse (min) svcdate (first) indic_id generic_id generic xr (count) entries, by(drug_id)

	rename svcdate entry_date
	drop drug_id


	append using entry_dates_cholesterol

	collapse (min) entry_date (first) indic_id (sum) entries, by(generic_id generic xr)

	save entry_dates_cholesterol, replace

}



// as a readable resouce for us

merge m:1 generic_id using "generic_name_mapping"
keep if _merge == 3
drop _merge

sort indic_id entry_date

save entry_dates_cholesterol_long, replace




//apply manual fixes
clear
use entry_dates_cholesterol_long

compress

replace entry_date = mdy(6,23,2006) if gennme == "Simvastatin" & generic == 1 & xr == 0 
replace entry_date = mdy(11,30,2011) if gennme == "Atorvastatin Calcium" & generic == 1 & xr == 0 

drop if gennme == "Niacin/Simvastatin" & generic == 1 & xr == 1

drop if entry_date < mdy(3,1,1996) // two months burn-in period


// number the entry events
sort indic_id entry_date

gen seq = 1
replace seq = seq + seq[_n-1] if indic_id == indic_id[_n-1]

egen max_seq = max(seq)

drop entries prodnme gennme
reshape wide entry_date generic_id generic xr, i(indic_id) j(seq)

save entry_dates_cholesterol_fixed, replace










/*
Step 2

Use the cholesterol claims data to construct patient pool for each quasi-experiment
*/

set seed 17 // fixed seed for the code

clear
use user_first_cholesterol_dates

rename first_use_date first_date

gen indic_id = 5

merge m:1 indic_id using entry_dates_cholesterol_fixed
keep if _merge == 3 // should all merge
drop _merge

summ max_seq
local loop_end = r(max)

forvalues i=1/`loop_end' {
	gen diff = first_date - entry_date`i' 
	gen treat`i' = 1 if diff >= 0 & diff <= 180
	gen control`i' = 1 if diff < 0 & diff >= -180

	
	tab treat`i'
	tab control`i'
	drop diff
}

// in the form user id, indication, entering drug characteristics, treatment/control
display _N

keep enrolid treat* control* generic_id* generic* xr* entry_date*

// reshape to long, drop irrelevant entries
reshape long treat control generic_id generic xr entry_date, i(enrolid) j(entry_num)

drop if treat == . & control == . // drop a bunch of entries

gen indic_id = 5

// summarize
duplicates tag enrolid, gen(t)
tab t
drop t

gen k = runiform()
sort enrolid k
egen t = tag(enrolid)
keep if t == 1
drop t

save user_treatment_indicators_cholesterol, replace




// set up alternative window

// 1) Set up the individual-level data
clear
use user_first_cholesterol_dates

rename first_use_date first_date

gen indic_id = 5


merge m:1 indic_id using entry_dates_cholesterol_fixed
keep if _merge == 3 // should all merge
drop _merge

summ max_seq
local loop_end = r(max)

forvalues i=1/`loop_end' {
	gen diff = first_date - entry_date`i' 
	
	gen treat_alt`i' = 1 if diff >= 0 & diff <= 60
	gen control_alt`i' = 1 if diff < 0 & diff >= -60
	
	//gen treat_alt_short`i' = 1 if diff >= 0 & diff <= 30
	//gen control_alt_short`i' = 1 if diff < 0 & diff >= -30
	
	tab treat_alt`i'
	tab control_alt`i'
	drop diff
}

// in the form user id, indication, entering drug characteristics, treatment/control
display _N

keep enrolid treat* control* generic_id* generic* xr* entry_date*

// reshape to long, drop irrelevant entries
reshape long treat_alt control_alt generic_id generic xr entry_date, i(enrolid) j(entry_num)

drop if treat_alt == . & control_alt == . // drop a bunch of entries

gen indic_id = 5

// summarize
duplicates tag enrolid, gen(t)
tab t
drop t

gen k = runiform()
sort enrolid k
egen t = tag(enrolid)
keep if t == 1
drop t

save user_treatment_indicators_cholesterol_alt_window, replace






// 1) Longer windows
clear
use user_first_cholesterol_dates

rename first_use_date first_date

gen indic_id = 5


merge m:1 indic_id using entry_dates_cholesterol_fixed
keep if _merge == 3 // should all merge
drop _merge

summ max_seq
local loop_end = r(max)

forvalues i=1/`loop_end' {
	gen diff = first_date - entry_date`i' 
	
	gen treat_extra`i' = 1 if diff >= 0 & diff <= 1800
	gen control_extra`i' = 1 if diff < 0 & diff >= -1800
	
	//gen treat_alt_short`i' = 1 if diff >= 0 & diff <= 30
	//gen control_alt_short`i' = 1 if diff < 0 & diff >= -30
	
	tab treat_extra`i'
	tab control_extra`i'
	drop diff
}

// in the form user id, indication, entering drug characteristics, treatment/control
display _N

keep enrolid treat* control* generic_id* generic* xr* entry_date*

// reshape to long, drop irrelevant entries
reshape long treat_extra control_extra generic_id generic xr entry_date, i(enrolid) j(entry_num)

drop if treat_extra == . & control_extra == . // drop a bunch of entries

gen indic_id = 5

// summarize
duplicates tag enrolid, gen(t)
tab t
drop t

gen k = runiform()
sort enrolid k
egen t = tag(enrolid)
keep if t == 1
drop t

save user_treatment_indicators_cholesterol_long_control, replace

